import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from PIL import Image, ImageEnhance
import matplotlib.cm as cm
from PIL import Image, ImageEnhance
match_df = pd.read_csv(r"C:\Users\Justin\Downloads\final_agg_match_stats.csv")
match_df
| date | game_size | match_id | match_mode | party_size | player_assists | player_dbno | player_dist_ride | player_dist_walk | player_dmg | player_kills | player_name | player_survive_time | team_id | team_placement | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2017-11-26 | 37 | 2U4GBNA0YmnNZYkzjkfgN4ev-hXSrak_BSey_YEG6kIuDG... | tpp | 2 | 0 | 1 | 2870.72400 | 1784.847780 | 117 | 1 | SnuffIes | 18.438667 | 4 | 18 |
| 1 | 2017-11-26 | 37 | 2U4GBNA0YmnNZYkzjkfgN4ev-hXSrak_BSey_YEG6kIuDG... | tpp | 2 | 0 | 1 | 2938.40723 | 1756.079710 | 127 | 1 | Ozon3r | 18.438583 | 4 | 18 |
| 2 | 2017-11-26 | 37 | 2U4GBNA0YmnNZYkzjkfgN4ev-hXSrak_BSey_YEG6kIuDG... | tpp | 2 | 0 | 0 | 0.00000 | 224.157562 | 67 | 0 | bovize | 3.925967 | 5 | 33 |
| 3 | 2017-11-26 | 37 | 2U4GBNA0YmnNZYkzjkfgN4ev-hXSrak_BSey_YEG6kIuDG... | tpp | 2 | 0 | 0 | 0.00000 | 92.935150 | 0 | 0 | sbahn87 | 3.292550 | 5 | 33 |
| 4 | 2017-11-26 | 37 | 2U4GBNA0YmnNZYkzjkfgN4ev-hXSrak_BSey_YEG6kIuDG... | tpp | 2 | 0 | 0 | 2619.07739 | 2510.447000 | 175 | 2 | GeminiZZZ | 25.624917 | 14 | 11 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 9995 | 2017-11-20 | 29 | 2U4GBNA0Ymlm4JHKbbmQ9x9_rZbdELcOlVMjnXNbpV6MTm... | tpp | 4 | 0 | 2 | 5947.79000 | 1629.568000 | 87 | 2 | 2thikk4u | 22.191600 | 6 | 7 |
| 9996 | 2017-11-20 | 29 | 2U4GBNA0Ymlm4JHKbbmQ9x9_rZbdELcOlVMjnXNbpV6MTm... | tpp | 4 | 0 | 0 | 4421.93262 | 3212.797850 | 137 | 2 | peckerpecker | 19.357767 | 6 | 7 |
| 9997 | 2017-11-20 | 29 | 2U4GBNA0Ymlm4JHKbbmQ9x9_rZbdELcOlVMjnXNbpV6MTm... | tpp | 4 | 0 | 2 | 0.00000 | 873.056300 | 193 | 0 | Bawngfist | 6.027517 | 6 | 7 |
| 9998 | 2017-11-20 | 29 | 2U4GBNA0Ymlm4JHKbbmQ9x9_rZbdELcOlVMjnXNbpV6MTm... | tpp | 4 | 0 | 1 | 0.00000 | 744.918152 | 87 | 1 | Coots_McGoots | 8.293617 | 7 | 14 |
| 9999 | 2017-11-20 | 29 | 2U4GBNA0Ymlm4JHKbbmQ9x9_rZbdELcOlVMjnXNbpV6MTm... | tpp | 4 | 1 | 0 | 0.00000 | 670.910339 | 12 | 0 | sbwwt | 8.252450 | 7 | 14 |
10000 rows × 15 columns
kill_df = pd.read_csv(r"C:\Users\Justin\Downloads\final_kill_match_stats.csv")
kill_df
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Down and Out | Malcolm_x | 9.0 | 496989.8 | 312569.7 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 17.250000 | Player 180 | 22.0 | 497385.4 | 331528.2 |
| 1 | M16A4 | Malcolm_x | 9.0 | 496989.8 | 312569.7 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 17.250000 | Player 181 | 22.0 | 497819.4 | 331981.3 |
| 2 | AKM | G_Berg | 7.0 | 460416.7 | 414748.8 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 23.700000 | Player 182 | 16.0 | 459817.9 | 414426.3 |
| 3 | AKM | Lukesnake17 | 20.0 | 488034.1 | 347220.3 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 20.166667 | Player 183 | 9.0 | 487444.2 | 347651.0 |
| 4 | SKS | AlooGobi | 2.0 | 501062.9 | 425078.6 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 30.300000 | Player 184 | 3.0 | 493043.4 | 434458.1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8229 | SKS | dragonfruitbamf | 2.0 | 536215.7 | 365051.5 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.366667 | Player 9996 | 6.0 | 527811.1 | 375679.6 |
| 8230 | M416 | lddoos | 3.0 | 550700.4 | 364211.8 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.200000 | Player 9997 | 6.0 | 522475.8 | 375727.8 |
| 8231 | Down and Out | Nan_P | 3.0 | 507691.3 | 388946.0 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 26.800000 | Player 9998 | 6.0 | 513487.8 | 377484.4 |
| 8232 | Grenade | brentech | 7.0 | 503235.2 | 351960.6 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 25.866667 | Player 9999 | 9.0 | 507682.2 | 354356.8 |
| 8233 | SCAR-L | SwwH | 13.0 | 447508.2 | 625632.6 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 8.683333 | Player 10000 | 20.0 | 447384.1 | 625920.0 |
8234 rows × 12 columns
The most winning party size in PUBG
# filter and select the rows with 'team_placement' = 1
win_df = match_df.drop(match_df[match_df['team_placement']!=1].index)
# filter and select the rows with 'party_size' = 1,2,4 and store into different dataframes
solo_df = win_df.drop(win_df[win_df['party_size']!=1].index)
double_df = win_df.drop(win_df[win_df['party_size']!=2].index)
quad_df = win_df.drop(win_df[win_df['party_size']!=4].index)
# count the number of rows in each dataframe for furthur analysis
solo_win = int(solo_df['team_placement'].value_counts())
double_win = int(double_df['team_placement'].value_counts())
quadruple_win = int(quad_df['team_placement'].value_counts())
# set the colours for pie chart
colorss = plt.cm.coolwarm(np.linspace(0.4,8))
# store and set the category(labels), and values(count) in each dataframe into a list
categories = ['Solo','Duo','Squad']
values = [solo_win, double_win, quadruple_win]
# separate the pie chart into sectors
explode = (0.05,0.05,0.05)
plt.figure(figsize=(7, 7))
# plot the pie chart, with shadow attribute
plt.pie(values, explode=explode, colors=colorss, shadow = True, autopct='%.4f%%')
# set a title for pie chart
plt.title('Probability to win (party size)')
# set a category table each categories
plt.legend(categories, loc = 'upper right')
# enhance the quality of the output
plt.tight_layout()
# display our plot
plt.show()
Visualisation of dangerous locations in PUBG Erangel map
# Data normalisation: Rescale the data in the "position" columns from their original values to a different scale.
# This is to bring the values into a more smaller range for further analysis or visualization.
kill2_df = kill_df.copy()
position_data = ["victim_position_x", "victim_position_y","killer_position_x", "killer_position_y"]
for position in position_data:
# lambda is used to perform a simple calculation on each element of a column, without defining a specific function for it
# Data normalisation
kill2_df[position] = kill2_df[position].apply(lambda x: x*4000/800000)
# Remove any rows where the position value is zero.
kill2_df = kill2_df[kill2_df[position]!=0]
# Display the updated dataframe
kill2_df
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Down and Out | Malcolm_x | 9.0 | 2484.9490 | 1562.8485 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 17.250000 | Player 180 | 22.0 | 2486.9270 | 1657.6410 |
| 1 | M16A4 | Malcolm_x | 9.0 | 2484.9490 | 1562.8485 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 17.250000 | Player 181 | 22.0 | 2489.0970 | 1659.9065 |
| 2 | AKM | G_Berg | 7.0 | 2302.0835 | 2073.7440 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 23.700000 | Player 182 | 16.0 | 2299.0895 | 2072.1315 |
| 3 | AKM | Lukesnake17 | 20.0 | 2440.1705 | 1736.1015 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 20.166667 | Player 183 | 9.0 | 2437.2210 | 1738.2550 |
| 4 | SKS | AlooGobi | 2.0 | 2505.3145 | 2125.3930 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 30.300000 | Player 184 | 3.0 | 2465.2170 | 2172.2905 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8229 | SKS | dragonfruitbamf | 2.0 | 2681.0785 | 1825.2575 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.366667 | Player 9996 | 6.0 | 2639.0555 | 1878.3980 |
| 8230 | M416 | lddoos | 3.0 | 2753.5020 | 1821.0590 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.200000 | Player 9997 | 6.0 | 2612.3790 | 1878.6390 |
| 8231 | Down and Out | Nan_P | 3.0 | 2538.4565 | 1944.7300 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 26.800000 | Player 9998 | 6.0 | 2567.4390 | 1887.4220 |
| 8232 | Grenade | brentech | 7.0 | 2516.1760 | 1759.8030 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 25.866667 | Player 9999 | 9.0 | 2538.4110 | 1771.7840 |
| 8233 | SCAR-L | SwwH | 13.0 | 2237.5410 | 3128.1630 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 8.683333 | Player 10000 | 20.0 | 2236.9205 | 3129.6000 |
7811 rows × 12 columns
# Data Reduction (Feature selection - selecting a subset of the original features that are most relevant to the analysis):
# Use only rows with "time" less than 5
# Extracting the rows for the shortest survival time (5 minutes)
kill_sample = kill2_df[kill2_df["time"]<5]
# Display the new dataframe
kill_sample
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6 | Down and Out | Snowzcone | 44.0 | 2330.8655 | 3211.4760 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 1.533333 | Player 186 | 49.0 | 2331.0745 | 3205.2575 |
| 9 | S1897 | MaelstromPhoenix | 14.0 | 2232.7690 | 3107.5790 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 1.750000 | Player 189 | 46.0 | 2233.4915 | 3105.1705 |
| 12 | P92 | Homebrw | 9.0 | 1773.6730 | 1985.1985 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 4.650000 | Player 192 | 38.0 | 1768.3455 | 1987.7230 |
| 13 | Micro UZI | NoMersee | 26.0 | 1689.2995 | 803.0060 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 2.766667 | Player 194 | 42.0 | 1696.7280 | 801.3480 |
| 17 | M416 | biubiu_RNG | 43.0 | 2255.2645 | 3148.2005 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 2.633333 | Player 198 | 37.0 | 2253.9605 | 3147.8755 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8213 | Down and Out | GreatPandaKing | 24.0 | 3574.5335 | 1651.7530 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 3.933333 | Player 9980 | 26.0 | 3556.4780 | 1639.7185 |
| 8215 | P1911 | Juarezneverfalls | 26.0 | 3558.7235 | 1635.6795 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 3.683333 | Player 9982 | 24.0 | 3560.2685 | 1636.8060 |
| 8217 | Tommy Gun | Go_getter | 24.0 | 3561.1820 | 1624.1475 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 2.316667 | Player 9984 | 26.0 | 3556.2920 | 1634.1975 |
| 8219 | UMP9 | Hking_909 | 24.0 | 3562.6530 | 1636.0950 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 4.033333 | Player 9986 | 26.0 | 3559.8120 | 1641.4650 |
| 8223 | Tommy Gun | Go_getter | 24.0 | 3564.9865 | 1642.8575 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 3.450000 | Player 9990 | 26.0 | 3569.9880 | 1647.2005 |
2488 rows × 12 columns
# Import miramar image
image = Image.open(r"C:\Users\Justin\Downloads\ERANGEL.jpg")
# Adjust the brightness if map
enhancer = ImageEnhance.Brightness(image)
# Increase this value to make it lighter
brightness_factor = 1.1
# apply the adjusted brightness to the image
brightened_image = enhancer.enhance(brightness_factor)
# Prepare the figure and subplot for further customization and plotting of data
# (1, 1) indicate that we want a single subplot in our figure
# figsize=(15, 15) sets the size of the figure to 15 inches by 15 inches
fig, ax = plt.subplots(1, 1, figsize=(15, 15))
# plot the KDE plot with both
plot = sns.kdeplot(data=kill_sample, x="victim_position_x",y="victim_position_y",n_levels=100, cmap=cm.Reds, alpha=0.9,ax=ax)
# label the axis and title name
plt.xlabel('x-coordinate', fontsize=18)
plt.ylabel('y-coordinate', fontsize=18)
plt.title('Most dangerous locations', fontsize=20)
# Get the limits of the axis and assign into the variables (min, max)
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
# aspect='auto',is used to maintain the original proportions of the image
# Display the image with plottings to the axis limits
ax.imshow(brightened_image, extent=[x_min, x_max, y_min, y_max], aspect='auto')
plt.show()
List of Top 15 gun weapons used in PUBG
# Define the list of gun categories to keep
guns = ['AKM','M16A4','SCAR-L','M416','Groza','M762',
'Kar98K','M24','AWM',
'SKS','VSS','Mini 14','Mk14','SLR',
'Micro UZI','UMP9','Vector','Tommy Gun',
'S686','S1897','S12K',
'M249','DP-28',
'P92','P1911','R1895','P18C']
# Keep only the rows with the specified gun categories
guns_df = kill_df[kill_df['killed_by'].isin(guns)]
# Show the filtered DataFrame
guns_df
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | M16A4 | Malcolm_x | 9.0 | 496989.8 | 312569.7 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 17.250000 | Player 181 | 22.0 | 497819.4 | 331981.3 |
| 2 | AKM | G_Berg | 7.0 | 460416.7 | 414748.8 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 23.700000 | Player 182 | 16.0 | 459817.9 | 414426.3 |
| 3 | AKM | Lukesnake17 | 20.0 | 488034.1 | 347220.3 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 20.166667 | Player 183 | 9.0 | 487444.2 | 347651.0 |
| 4 | SKS | AlooGobi | 2.0 | 501062.9 | 425078.6 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 30.300000 | Player 184 | 3.0 | 493043.4 | 434458.1 |
| 5 | M416 | Powfa | 3.0 | 495501.2 | 429826.2 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 29.433333 | Player 185 | 6.0 | 495755.7 | 438322.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8226 | AKM | Addictted | 2.0 | 535211.4 | 364872.2 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 29.000000 | Player 9993 | 3.0 | 535258.8 | 365134.4 |
| 8228 | SCAR-L | SurpriseMtheFker | 13.0 | 435366.4 | 625140.6 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 7.250000 | Player 9995 | 20.0 | 447501.6 | 628886.9 |
| 8229 | SKS | dragonfruitbamf | 2.0 | 536215.7 | 365051.5 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.366667 | Player 9996 | 6.0 | 527811.1 | 375679.6 |
| 8230 | M416 | lddoos | 3.0 | 550700.4 | 364211.8 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 27.200000 | Player 9997 | 6.0 | 522475.8 | 375727.8 |
| 8233 | SCAR-L | SwwH | 13.0 | 447508.2 | 625632.6 | ERANGEL | 2U4GBNA0YmnP7JxGpV6xH481nTM-AN2Ig_D_m3DRTNjINB... | 8.683333 | Player 10000 | 20.0 | 447384.1 | 625920.0 |
5370 rows × 12 columns
# make the graphs to have gradient colors
colors = plt.cm.Oranges(np.linspace(0.5,2))
# Plot a horizontal bar chart that shows the top 15 weapons by kill count
# value_counts() - count the unique values in the 'killed_by' column - returns a Series
# select the top 15 values and sort the values in series in ascending order
guns_df['killed_by'].value_counts()[:15].sort_values(ascending = True).plot.barh(figsize=(10,7),color = colors, zorder = 2)
# set the title and name both axis
# add grids to the background of graph
# zorder - the layer of the grids or bars; alpha - transparency of grid
plt.title('Top 15 Weapons by Kill Count', fontsize = 20)
plt.xlabel('Kill Count', fontsize = 15)
plt.ylabel('Weapon', fontsize = 15)
plt.grid(zorder = 1, alpha = 0.4)
# count the unique values in the 'killed_by' column
# then thr top 15 from 'counts' will be selected
# top_15 will be sorted in descending order
counts = guns_df['killed_by'].value_counts()
top_15 = counts[:15]
sorted_top_15 = top_15.sort_values(ascending=False)
# new dataframe with two column, using the sorted_top_15 Series
top15_df = pd.DataFrame({'Weapons':sorted_top_15.index,'Counts':sorted_top_15.values})
# Match the weapons to respective gun types
# Categorise gun types using dictionaries
# key - gun type, value - weapon names
gun_categories = {
'Assault Rifles':['AKM','M16A4','SCAR-L','M416','Groza','M762'],
'Lever Action Sniper Rifles':['Kar98K','M24','AWM'],
'Automatic Snifer Rifles':['SKS','VSS','Mini 14','Mk14','SLR'],
'SubMachine Guns':['Micro UZI','UMP9','Vector','Tommy Gun'],
'Shotguns':['S686','S1897','S12K'],
'Light Machine Guns':['M249','DP-28'],
'Pistols':['P92','P1911','R1895','P18C']
}
# Define a mapping function
# the parameter 'weapon' represents the value in the column "Weapons"
# the function then iterates (performs repeatedly) over the items of the gun_categories dictionary using the .items() method
# 'gun_categories' dict - key: gun types, value: weapon names
# in the loop, if 'weapon' presents in the 'value' list (weapon names) in the dict
# means the 'weapon' belongs to the gun type, the corresponding 'key' is returned
# if no match is found, the function returns 'None'
def map_gun_type(weapon):
for key, values in gun_categories.items():
if weapon in values:
return key
return None
# Map values using the mapping function
top15_df["Gun type"] = top15_df["Weapons"].map(map_gun_type)
# Use the same way to match the weapons to respective ammo types
# Categorise gun types using dictionaries
# key - ammo type, value - weapon names
ammo_categories = {'.300 Magnum Ammo':'AWM',
'.45 ACP Ammo':['P1911','Tommy Gun','UMP9'],
'12 Gauge Ammo':['S1897','S686','S12K'],
'5.56mm Ammo':['M16A4','M249','M416','SCAR-L','Mini 14'],
'7.62mm Ammo':['AKM','DP-28','Groza','M762','Kar98K','M24','Mk14','R1895','SKS','SLR'],
'9mm Ammo':['Micro UZI','P92','VSS','P18C','Vector']
}
# Define a mapping function
# the parameter 'weapon' represents the value in the column "Weapons"
# the function then iterates (performs repeatedly) over the items of the ammo_cat dictionary using the .items() method
# 'ammo_cat' dict - key: ammo types, value: weapon names
# in the loop, if 'weapon' presents in the 'value' list (weapon names) in the dict
# means the 'weapon' belongs to the gun type, the corresponding 'key' is returned
# if no match is found, the function returns 'None'
def map_ammo_type(weapon):
for key, values in ammo_categories.items():
if weapon in values:
return key
return None
# Map values using the mapping function
top15_df["Ammo type"] = top15_df["Weapons"].map(map_ammo_type)
# let the index starts from 1 (instead of 0)
top15_df.index = range(1, len(top15_df) + 1)
# Show the final list of Top 15 Weapons, followed by Gun and Ammo type
top15_df
| Weapons | Counts | Gun type | Ammo type | |
|---|---|---|---|---|
| 1 | M416 | 845 | Assault Rifles | 5.56mm Ammo |
| 2 | SCAR-L | 755 | Assault Rifles | 5.56mm Ammo |
| 3 | AKM | 689 | Assault Rifles | 7.62mm Ammo |
| 4 | M16A4 | 678 | Assault Rifles | 5.56mm Ammo |
| 5 | UMP9 | 459 | SubMachine Guns | .45 ACP Ammo |
| 6 | S1897 | 361 | Shotguns | 12 Gauge Ammo |
| 7 | Mini 14 | 270 | Automatic Snifer Rifles | 5.56mm Ammo |
| 8 | SKS | 214 | Automatic Snifer Rifles | 7.62mm Ammo |
| 9 | S686 | 212 | Shotguns | 12 Gauge Ammo |
| 10 | S12K | 201 | Shotguns | 12 Gauge Ammo |
| 11 | Micro UZI | 166 | SubMachine Guns | 9mm Ammo |
| 12 | P1911 | 111 | Pistols | .45 ACP Ammo |
| 13 | Tommy Gun | 98 | SubMachine Guns | .45 ACP Ammo |
| 14 | P92 | 96 | Pistols | 9mm Ammo |
| 15 | Vector | 45 | SubMachine Guns | 9mm Ammo |
Relationship between Number of Assists & Probability to Win
# make the graphs to have gradient colors
colors = plt.cm.Oranges(np.linspace(0.3,3))
# create new column 'winner', values in the column are true if 'team_placement' equals to 1
# takes only rows with the top1 player data
match_df['winner'] = match_df['team_placement'] == 1
# new dataframe is created by selecting specific columns 'player_assists' and 'winner'
# only the rows with 'party size' not equal to 1 are included in the new dataframe
# we dont want rows with 'party size' equal to 1
assists_df = match_df.loc[match_df['party_size']!=1, ['player_assists','winner']]
# Plot a bar chart that shows the relationship between number of assists and probability to win
# all rows in df that have the same unique values in the 'player_assists' column are grouped together
# calculates the mean value of the 'winner' column for each group
# the mean will represent the proportion of wins for each 'number of assists' group
# plot.bar() creates a bar chart of the mean values calculated for each category
# rot & figsize is optional
# color = colors - sets the bars to have gradient colors
# zorder - sets the order of the bar chart displayed in the graph
assists_df.groupby('player_assists').winner.mean().plot.bar(rot = 0, figsize = (10,5), color = colors, zorder = 2)
# set titles and name both axis
# add grids to the background of graph
# zorder - the layer of the grids or bars; alpha - transparency of grid
plt.title("Relationship between Number of Assists & Probability to Win", fontsize = 18)
plt.xlabel("Number of assists by player", fontsize = 14)
plt.ylabel("Probability to Win", fontsize = 14)
plt.grid(axis = 'y', zorder = 1)
1. Relationship between Driving & Probability to Win
# create new column 'winner', values in the column are true if 'team_placement' equals to 1
# takes only rows with the top1 player data
match_df['winner'] = match_df['team_placement'] == 1
# create new column 'winner', values in the column are true if driving distance doesnt equal to 0
# takes only rows with the driving distance
match_df['drove'] = match_df['player_dist_ride'] != 0
# Plot a horizontal bar chart that shows the relationship between driving and probability to win
# groups the values in the 'drove' column and create two groups - for True values and False values
# can calculate the proportion of 'True' values in each group
# plot.barh() creates a horizontal bar chart of the mean values calculated for each category
# figsize = (width, height) sets the size of the figure
# without figsize = (w,h) also can, will show figure by default size
match_df.groupby('drove').winner.mean().plot.barh(figsize = (10,5), color = "Orange", zorder = 2)
# set titles, name both axis and set labels for y-axis
# add grids to the background of graph
# zorder - the layer of the grids or bars; alpha - transparency of grid
plt.title("Relationship between Driving & Probability to Win", fontsize = 18)
plt.xlabel("Probability to Win", fontsize = 14)
plt.ylabel("Whether players drive or not", fontsize = 14)
plt.yticks([1,0],["Drive","Don't drive"])
plt.grid(zorder = 1, alpha = 0.5)
2. Relationship between Driving Distance & Probability to Win
# make the graphs to have gradient colors
colors = plt.cm.Oranges(np.linspace(0.4,2))
# !!
# df.loc[df[condition,['column1','column2']
# select rows from the dataframe we imported (df_match)
# rows with the values < 10000 under the column 'player_dist_ride'
# only include these columns in new dataframe (distance_df): 'player_dist_ride','player_survive_time'
distance_df = match_df.loc[match_df['player_dist_ride'] < 10000, ['player_dist_ride','winner']]
# create labels to categorise the distance (10 groups)
label_distance = ["0-1k","1k-2k","2k-3k","3k-4k","4k-5k","5k-6k","6k-7k","7k-8k","8k-9k","9k-10k"]
# pd.cut(df['column'],number of intervals,labels = labelname)
# pd.cut() divides data into a specified number of categories or intervals
# categorise the distance values and match to the labels
distance_df['drive_distance'] = pd.cut(distance_df['player_dist_ride'], 10, labels = label_distance)
# Plot a bar chart that shows the relationship between driving distance and probability to win
# 1. df.groupby('column') groups the data in df based on the unique values in column
# 2. all rows in df that have the same value in the column are grouped together
# 3. by applying groupby(), distinct groups based on drive_distance categories are created
# 4. so can calculate the mean value of the 'winner' column for each group,
# the mean will represent the proportion of wins for each 'drive_distance' group
# 5. plot.bar() creates a bar chart of the mean values calculated for each category
# 6. rot & figsize is optional
# 7. 'rot = 0' rotates the label by 0 degrees = no rotation = horizontally (easier to read)
# 8. if 'rot = 45' rotates the label by 45 degrees (clockwise)
# 9. if no put 'rot = 0', the label would be in 90 degrees (or by default angle), vertically shown
# 10. figsize = (width, height) sets the size of the figure
# 11. color = colors - sets the bars to have gradient colors
# 12. zorder - sets the order of the bar chart displayed in the graph
distance_df.groupby('drive_distance').winner.mean().plot.bar(rot = 0, figsize = (13,5), color = colors, zorder = 2)
# set titles, name both axis and set labels for y-axis
# add grids to the background of graph
# zorder - the layer of the grids or bars; alpha - transparency of grid
plt.title("Relationship between Driving Distance & Probability to Win", fontsize = 18)
plt.xlabel("Driving distance", fontsize = 14)
plt.ylabel("Probability to win", fontsize = 14)
plt.grid(zorder = 1, alpha = 0.6)
Visualisation of the final bluezone locations
kill_df.shape
(8234, 12)
match_df.shape
(10000, 17)
# Select only the rows where the "team_placement" column has a value of 1
team_win = match_df [match_df ["team_placement"]==1]
# Find the last man standing from the 1st team
# Group the dataframe "team_win" by the "match_id" column and applies a lambda function to each group.
# The lambda function is used to filter each group and select the row with the max value in "player_survive_time" column
grouped = team_win.groupby('match_id').apply(lambda t: t[t.player_survive_time==t.player_survive_time.max()])
# Select only the rows of "kill_df" where value of "match_id" column is same with the "match_id" values of "grouped"
deaths_solo = kill_df[kill_df['match_id'].isin (grouped ['match_id'].values)]
# Select only the rows where the "victim_placement" column has a value of 2
df_second = deaths_solo[(deaths_solo['victim_placement']==2)].dropna()
# display the new dataframe
df_second
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 37 | M416 | DevilBlood35 | 1.0 | 504913.5 | 441667.2 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 31.416667 | Player 219 | 2.0 | 503132.6 | 433288.4 |
| 102 | SCAR-L | Illmy0111 | 1.0 | 180234.1 | 512721.8 | ERANGEL | 2U4GBNA0YmlxiifTmjHuCGJhLNLL-lhlH9TQh47o-9IZVJ... | 30.900000 | Player 285 | 2.0 | 181216.5 | 513549.2 |
| 175 | Down and Out | Illmy0111 | 1.0 | 174635.6 | 510872.2 | ERANGEL | 2U4GBNA0YmlxiifTmjHuCGJhLNLL-lhlH9TQh47o-9IZVJ... | 27.333333 | Player 360 | 2.0 | 166706.2 | 538946.3 |
| 206 | AKM | VSUPciwei | 1.0 | 396662.7 | 307742.9 | ERANGEL | 2U4GBNA0YmmLlZbT02zMNt2JlWQ2eYFKRG02TGemIK1RM2... | 31.066667 | Player 392 | 2.0 | 396531.8 | 304991.7 |
| 248 | Down and Out | VSUPciwei | 1.0 | 396289.3 | 308352.7 | ERANGEL | 2U4GBNA0YmmLlZbT02zMNt2JlWQ2eYFKRG02TGemIK1RM2... | 31.066667 | Player 434 | 2.0 | 394014.9 | 303887.9 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7827 | M416 | CNM_10086 | 1.0 | 321362.7 | 151641.6 | ERANGEL | 2U4GBNA0YmmkSI66uqmO1tdpb1le9dHphK-SEiVW9bCyMM... | 31.416667 | Player 9406 | 2.0 | 320446.6 | 145273.4 |
| 7998 | Grenade | gouxiongwang | 1.0 | 501849.9 | 296173.3 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 32.666667 | Player 9582 | 2.0 | 502382.6 | 297894.0 |
| 8020 | Kar98k | R_eborn | 3.0 | 505507.1 | 293815.9 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 30.083333 | Player 9604 | 2.0 | 492292.8 | 302644.7 |
| 8029 | Kar98k | R_eborn | 3.0 | 505503.1 | 293733.7 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 29.650000 | Player 9613 | 2.0 | 495596.8 | 306128.8 |
| 8030 | Down and Out | R_eborn | 3.0 | 505494.6 | 293797.7 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 31.166667 | Player 9614 | 2.0 | 494745.7 | 304383.4 |
202 rows × 12 columns
df_second.shape
(202, 12)
# Data normalisation: Rescale the data in the "position" columns from their original values to a different scale.
# This is to bring the values into a more smaller range for further analysis or visualization.
position_data = ["victim_position_x", "victim_position_y","killer_position_x", "killer_position_y"]
for position in position_data:
# Data normalisation
# lambda is used to perform a simple calculation on each element of a column, without defining a specific function for it
df_second[position] = df_second[position].apply(lambda x: x*4000/800000)
# Remove any rows where the position value is zero.
df_second = df_second[df_second[position]!=0]
# display the updated dataframe
df_second
| killed_by | killer_name | killer_placement | killer_position_x | killer_position_y | map | match_id | time | victim_name | victim_placement | victim_position_x | victim_position_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 37 | M416 | DevilBlood35 | 1.0 | 2524.5675 | 2208.3360 | ERANGEL | 2U4GBNA0YmnRe95wOy7kuweIkuZo5Roa0WjNZsgboi2gzz... | 31.416667 | Player 219 | 2.0 | 2515.6630 | 2166.4420 |
| 102 | SCAR-L | Illmy0111 | 1.0 | 901.1705 | 2563.6090 | ERANGEL | 2U4GBNA0YmlxiifTmjHuCGJhLNLL-lhlH9TQh47o-9IZVJ... | 30.900000 | Player 285 | 2.0 | 906.0825 | 2567.7460 |
| 175 | Down and Out | Illmy0111 | 1.0 | 873.1780 | 2554.3610 | ERANGEL | 2U4GBNA0YmlxiifTmjHuCGJhLNLL-lhlH9TQh47o-9IZVJ... | 27.333333 | Player 360 | 2.0 | 833.5310 | 2694.7315 |
| 206 | AKM | VSUPciwei | 1.0 | 1983.3135 | 1538.7145 | ERANGEL | 2U4GBNA0YmmLlZbT02zMNt2JlWQ2eYFKRG02TGemIK1RM2... | 31.066667 | Player 392 | 2.0 | 1982.6590 | 1524.9585 |
| 248 | Down and Out | VSUPciwei | 1.0 | 1981.4465 | 1541.7635 | ERANGEL | 2U4GBNA0YmmLlZbT02zMNt2JlWQ2eYFKRG02TGemIK1RM2... | 31.066667 | Player 434 | 2.0 | 1970.0745 | 1519.4395 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 7827 | M416 | CNM_10086 | 1.0 | 1606.8135 | 758.2080 | ERANGEL | 2U4GBNA0YmmkSI66uqmO1tdpb1le9dHphK-SEiVW9bCyMM... | 31.416667 | Player 9406 | 2.0 | 1602.2330 | 726.3670 |
| 7998 | Grenade | gouxiongwang | 1.0 | 2509.2495 | 1480.8665 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 32.666667 | Player 9582 | 2.0 | 2511.9130 | 1489.4700 |
| 8020 | Kar98k | R_eborn | 3.0 | 2527.5355 | 1469.0795 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 30.083333 | Player 9604 | 2.0 | 2461.4640 | 1513.2235 |
| 8029 | Kar98k | R_eborn | 3.0 | 2527.5155 | 1468.6685 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 29.650000 | Player 9613 | 2.0 | 2477.9840 | 1530.6440 |
| 8030 | Down and Out | R_eborn | 3.0 | 2527.4730 | 1468.9885 | ERANGEL | 2U4GBNA0YmlAIXAxa8UNSqKMPaMblcwdWLI6-w44_RBSbY... | 31.166667 | Player 9614 | 2.0 | 2473.7285 | 1521.9170 |
198 rows × 12 columns
# Set the plotting context to 'talk'
# The resulting plot will have larger fonts, thicker lines, and other adjustments optimized for a talk or presentation setting.
# This helps ensure that the visual elements of the plot are clear and easily visible to the audience.
sns.set_context('talk')
# Import the image of erangel map
image = Image.open(r"C:\Users\Justin\Downloads\ERANGEL.jpg")
# Adjust the brightness if map
enhancer = ImageEnhance.Brightness(image)
# Increase this value to make it lighter
brightness_factor = 1.3
# apply the adjusted brightness to the image
brightened_image = enhancer.enhance(brightness_factor)
# Prepare the figure and subplot for further customization and plotting of data
# (1, 1) indicate that we want a single subplot in our figure
# figsize=(15, 15) sets the size of the figure to 15 inches by 15 inches
fig, ax = plt.subplots(1, 1, figsize=(15, 15))
# Create a KDE plot for visualisation
# 1. Use df_second as data source for plotting
# 2. Specify the x and y axis
# 3. cmap is used to set the blue colormap for the plot
# 4. An alpha value of 0.7 means that the plot will be slightly transparent
# 5. shade=True means the area under the curve is filled with color
# 6. ax=ax means to add to the subplot created earlier
plot = sns.kdeplot(data=df_second, x="victim_position_x", y="victim_position_y", cmap="Blues", alpha=0.7, shade=True, ax=ax)
# Reduce the contrast by adjusting alpha
# Adjust the alpha value to reduce transparency
plot.collections[8].set_alpha(0.8)
plot.collections[0].set_alpha(0.3)
plot.collections[1].set_alpha(0.5)
# Adjust the RGB values to reduce saturation
# (0.5, 0.5, 1) is a tuple that corresponds to a light blue color in the RGB value.
plot.collections[0].set_facecolor((0.5, 0.5, 1))
# Get the limits of the axis and assign into the variables (min, max)
x_min, x_max = ax.get_xlim()
y_min, y_max = ax.get_ylim()
# Display image of map with the KDE plot
# Image will be displayed within the same x-axis and y-axis limits using the extend parameter
# For example, display from the minimum x coordinate and extened to the maximum x coordinate
# Aspect = 'auto' is used to match the ratio of the image and the subplot
ax.imshow(brightened_image, extent=[x_min, x_max, y_min, y_max], aspect='auto')
# Sett the title, and the x,y lables
plt.xlabel('x-coordinate', fontsize=18)
plt.ylabel('y-coordinate', fontsize=18)
plt.title('Final Bluezone locations', fontsize=20)
#Display the results
plt.show()
# In the KDE plot, higher density regions are shown by more intense colors
# and lower density regions indicated by lighter colors
C:\Users\Justin\AppData\Local\Temp\ipykernel_11832\3161281345.py:16: FutureWarning: `shade` is now deprecated in favor of `fill`; setting `fill=True`. This will become an error in seaborn v0.14.0; please update your code. plot = sns.kdeplot(data=df_second, x="victim_position_x", y="victim_position_y", cmap="Blues", alpha=0.7, shade=True, ax=ax)